0: Preparation

Defining the input and output files

Loading libraries

library(knitr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2

Causative variant

Variant fixation

Fixation probability

Fixation time

# Function to determine the number of rows until fixation is reached
time_to_fixation <- function(data, threshold = 0.9) {
  # Find the index of the first row where the allele frequency is 0.9 or higher
  fixation_index <- which(data$allele_frequency >= threshold)[1]
  
  # Return the number of rows until fixation is reached
  return(fixation_index - 1)
}

Summary - Variant fixation

Selection_coefficient Fixation_probability Avg_Fixation_time Min_fixation_time Max_fixation_time
1 s=0.05 0.2 33.80 27 39
3 s=0.2 16.3 32.40 22 39
2 s=0.1 1.5 31.85 18 39
4 s=0.3 27.8 27.55 18 39
5 s=0.4 40.8 20.95 15 29
6 s=0.5 40.0 16.65 10 26
7 s=0.6 35.7 11.45 8 16
8 s=0.7 46.5 10.25 7 13
9 s=0.8 45.5 7.45 4 10

Causative variant windows

Variant position

Window lengths

Causative variant windows

Summary - Causative variant windows

Selection_coefficient Avg_Length_Mb Avg_window_freq Min_freq Max_freq Avg_freq_variant_100k_window
1 s=0.05 3.655 71.0 32 100 71.9
3 s=0.2 4.430 78.9 34 100 79.3
4 s=0.3 4.435 83.1 38 100 84.0
5 s=0.4 4.625 78.1 32 100 78.2
2 s=0.1 4.775 73.2 12 100 73.9
6 s=0.5 5.040 92.9 56 100 93.6
7 s=0.6 5.875 87.2 48 100 88.0
8 s=0.7 6.220 93.2 36 100 94.4
9 s=0.8 7.895 88.9 28 100 89.9

Standard Error Confidence interval function

Confidence level <=> konfidensgrad

# Function to calculate standard error and its confidence interval
standard_error_confidence_interval_fun <- function(observed_data, confidence_level = 0.95) {
  
  # Calculate standard error
  n <- length(observed_data)
  standard_deviation <- sd(observed_data)
  standard_error <- standard_deviation / sqrt(n - 1)
  
  # Calculate confidence interval based on standard error

  alpha <- (1 - confidence_level) / 2
  margin_of_error <- qnorm(1 - alpha) * standard_error
  mean_estimate <- mean(observed_data)
  # Calculate the percentiles for the confidence interval
  confidence_interval_lower_bound <- mean_estimate - margin_of_error # 2.5th percentile (2σ)
  confidence_interval_upper_bound <- mean_estimate + margin_of_error # 97.5th percentile (2σ)
  
  # Return confidence interval
  return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
}





# # Function to calculate bootstrap confidence intervals
# standard_error_confidence_interval_fun <- function(observed_data, n_bootstraps = 1000, confidence_level = 0.95) {
#   
#   # Function to calculate the mean for each bootstrap sample
#   compute_bootstrap_mean_fun <- function(observed_data_urn) {
#     bootstrap_dataset <- sample(observed_data_urn, replace = TRUE)
#     bootstrap_estimate <- mean(bootstrap_dataset)
#     return(bootstrap_estimate)
#   }
#   
#   # Perform bootstrap resampling
#   bootstrap_point_estimates <- replicate(n_bootstraps, compute_bootstrap_mean_fun(observed_data))
#   
#   # Calculate the percentiles for the confidence interval
#   alpha <- (1 - confidence_level) / 2
#   confidence_interval_lower_bound <- quantile(bootstrap_point_estimates, alpha) # 2.5th percentile
#   confidence_interval_upper_bound <- quantile(bootstrap_point_estimates, 1 - alpha) # 97.5th percentile
#   
#   # Return the confidence interval
#   return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
# }

1: ROH-Frequency

1.1 : Autosome ROH-frequencies

1.1.1 : Empirical - ROH frequency

1.1.2: Neutral Model - ROH frequency

1.1.3: Selection Model

1.1.4 Summary - ROH-hotspot threshold

## ROH-hotspot selection testing results://n
Model Avg_ROH_hotspot_threshold
Neutral 40.0
s=0.1 42.6
s=0.3 45.1
s=0.05 46.6
s=0.2 47.0
s=0.4 47.0
s=0.6 52.0
s=0.5 54.2
s=0.7 58.2
s=0.8 60.6
Empirical 75.0

1.2 ROH-hotspots - ROH Frequency and Length

2: Inbreeding coefficient

2.1 Empirical data

## Overall Average Avg_F_ROH for  german_shepherd : 0.2753012 //n

2.2 Neutral Model

## Point estimate F_ROH across all 20 simulations: 0.3231565 //n
## [1] "Bootstrap 95% Confidence Interval: [0.302707382298313, 0.343605637701687]"

2.3 Selection Model

## Point estimate F_ROH across all 20 simulations for  selection_model_s005_chr3 : 0.3789376 //n[1] "Bootstrap 95% Confidence Interval: [0.356936709259486, 0.400938510740514]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s01_chr3 : 0.3475749 //n[1] "Bootstrap 95% Confidence Interval: [0.329352061370301, 0.365797778629699]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s02_chr3 : 0.3852102 //n[1] "Bootstrap 95% Confidence Interval: [0.360506697169921, 0.409913642830079]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s03_chr3 : 0.382973 //n[1] "Bootstrap 95% Confidence Interval: [0.356057264547997, 0.409888755452003]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s04_chr3 : 0.4022861 //n[1] "Bootstrap 95% Confidence Interval: [0.379804687863133, 0.424767512136866]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s05_chr3 : 0.438943 //n[1] "Bootstrap 95% Confidence Interval: [0.407964226284795, 0.469921813715205]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s06_chr3 : 0.4301045 //n[1] "Bootstrap 95% Confidence Interval: [0.398231547108479, 0.461977472891521]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s07_chr3 : 0.4466082 //n[1] "Bootstrap 95% Confidence Interval: [0.419672277441459, 0.473544062558542]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s08_chr3 : 0.4687358 //n[1] "Bootstrap 95% Confidence Interval: [0.432117907312471, 0.505353732687529]"

2.4 F_ROH summary

Model F_ROH Lower_CI Upper_CI
Empirical 0.275 NA NA
Neutral 0.323 0.303 0.344
s=0.1 0.348 0.329 0.366
s=0.05 0.379 0.357 0.401
s=0.3 0.383 0.356 0.410
s=0.2 0.385 0.361 0.410
s=0.4 0.402 0.380 0.425
s=0.6 0.430 0.398 0.462
s=0.5 0.439 0.408 0.470
s=0.7 0.447 0.420 0.474
s=0.8 0.469 0.432 0.505

2.4.1 Visualizing F_ROH

## Models where empirical F_ROH is within CI:
## character(0)
## 
## Models where empirical F_ROH is outside CI:
##  [1] "s=0.05"  "s=0.1"   "s=0.2"   "s=0.3"   "s=0.4"   "s=0.5"   "s=0.6"  
##  [8] "s=0.7"   "s=0.8"   "Neutral"

3: Expected Heterozygosity

3.1 Empirical data

3.2 Neutral Model

3.3 Selection Model

## Uncommented because change of analysis

3.4 Causative Variant Window

## Point estimate H_e across all 20 simulations for  s005_chr3 : 0.03835051 //n[1] "Bootstrap 95% Confidence Interval: [0.0286911574700695, 0.0480098532362818]"
## Point estimate H_e across all 20 simulations for  s01_chr3 : 0.03615789 //n[1] "Bootstrap 95% Confidence Interval: [0.0226773044355698, 0.0496384684079676]"
## Point estimate H_e across all 20 simulations for  s02_chr3 : 0.0249121 //n[1] "Bootstrap 95% Confidence Interval: [0.0147684851908972, 0.0350557107187597]"
## Point estimate H_e across all 20 simulations for  s03_chr3 : 0.02055552 //n[1] "Bootstrap 95% Confidence Interval: [0.00764693794749041, 0.0334641051013026]"
## Point estimate H_e across all 20 simulations for  s04_chr3 : 0.02878277 //n[1] "Bootstrap 95% Confidence Interval: [0.0129018766653808, 0.044663667853679]"
## Point estimate H_e across all 20 simulations for  s05_chr3 : 0.01151372 //n[1] "Bootstrap 95% Confidence Interval: [0.00244507213179341, 0.0205823727315342]"
## Point estimate H_e across all 20 simulations for  s06_chr3 : 0.01601255 //n[1] "Bootstrap 95% Confidence Interval: [0.00661909993119089, 0.0254060099286539]"
## Point estimate H_e across all 20 simulations for  s07_chr3 : 0.008196857 //n[1] "Bootstrap 95% Confidence Interval: [0.00329575703966468, 0.0130979577437562]"
## Point estimate H_e across all 20 simulations for  s08_chr3 : 0.01370687 //n[1] "Bootstrap 95% Confidence Interval: [0.00367441728198638, 0.0237393233295754]"

3.4 Genomewide 5th percentile comparison - Expected Heterozygosity Summary

Model H_e_5th_percentile Lower_CI Upper_CI
s005_chr3 0 0 0
s01_chr3 0 0 0
s02_chr3 0 0 0
s03_chr3 0 0 0
s04_chr3 0 0 0
s05_chr3 0 0 0
s06_chr3 0 0 0
s07_chr3 0 0 0
s08_chr3 0 0 0
Neutral 0 0 0
Empirical NA NA NA

4: Summary

4.0 General comparison

4.0.1 ROH-hotspot threshold comparison

## 
##  ROH-hotspot threshold comparison between the different datasets
Model Avg_ROH_hotspot_threshold
Neutral 40.0
s=0.1 42.6
s=0.3 45.1
s=0.05 46.6
s=0.2 47.0
s=0.4 47.0
s=0.6 52.0
s=0.5 54.2
s=0.7 58.2
s=0.8 60.6
Empirical 75.0

4.0.2 F_ROH comparison

Model F_ROH Lower_CI Upper_CI
Empirical 0.275 NA NA
Neutral 0.323 0.303 0.344
s=0.1 0.348 0.329 0.366
s=0.05 0.379 0.357 0.401
s=0.3 0.383 0.356 0.410
s=0.2 0.385 0.361 0.410
s=0.4 0.402 0.380 0.425
s=0.6 0.430 0.398 0.462
s=0.5 0.439 0.408 0.470
s=0.7 0.447 0.420 0.474
s=0.8 0.469 0.432 0.505

4.1: Hotspot comparison

4.1.1: Selection test (Sweep test)

## [1] "Selection test results"
## [1] "ROH-hotspot windows with an mean H_e Value lower or equal to the lower confidence interval of the fifth percentile of the neutral model are classified as being under selection"
## [1] "5th percentile of the neutral model is: 0"
Name Under_selection Window_based_Average_H_e
Hotspot_chr3_window_1 No 0.10706
Hotspot_chr3_window_3 No 0.12371
Hotspot_chr3_window_2 No 0.14453
Hotspot_chr17_window_2 No 0.14866
Hotspot_chr17_window_1 No 0.15285
Hotspot_chr19_window_1 No 0.16270
## [1] "ROH-hotspots under selection:"
Name Under_selection Window_based_Average_H_e

4.1.2: Selection Strength Test (Sweep test)

4.1.1.1 Extracting Causative windows under selection

Model H_e Lower_CI Upper_CI Under_Selection
Neutral 0.00000 0.00000 0.00000 No
s=0.7 0.00820 0.00330 0.01310 No
s=0.5 0.01151 0.00245 0.02058 No
s=0.8 0.01371 0.00367 0.02374 No
s=0.6 0.01601 0.00662 0.02541 No
s=0.3 0.02056 0.00765 0.03346 No
s=0.2 0.02491 0.01477 0.03506 No
s=0.4 0.02878 0.01290 0.04466 No
s=0.1 0.03616 0.02268 0.04964 No
s=0.05 0.03835 0.02869 0.04801 No

4.1.1.1 Extracting Hotspots under selection

*** Behöver bootstrap av 5th percentiles från neutral model

Hotspot - Causative Window - Comparison

Model Lengths_Mb ROH_Freq H_e Under_selection
Hotspot_chr3_window_1 10.900 81.3 0.10706 No
s=0.8 7.895 88.9 0.01371 No
s=0.7 6.220 93.2 0.00820 No
s=0.6 5.875 87.2 0.01601 No
s=0.5 5.040 92.9 0.01151 No
s=0.1 4.775 73.2 0.03616 No
s=0.4 4.625 78.1 0.02878 No
s=0.3 4.435 83.1 0.02056 No
s=0.2 4.430 78.9 0.02491 No
Hotspot_chr19_window_1 4.400 75.6 0.16270 No
s=0.05 3.655 71.0 0.03835 No
Hotspot_chr3_window_2 3.200 76.3 0.12371 No
Hotspot_chr3_window_3 2.700 77.6 0.14453 No
Hotspot_chr17_window_1 2.000 76.4 0.14866 No
Hotspot_chr17_window_2 0.600 76.1 0.15285 No

Visualizing and scaling

# Min-max-scaling normalization function, that normalizes a vector of values
min_max_normalization_fun <- function(x) {
  return((x - min(x)) / (max(x) - min(x)))
}
# Install and load the scatterplot3d package
# install.packages("scatterplot3d")
library(scatterplot3d)
## Warning: package 'scatterplot3d' was built under R version 4.3.1
### Min max scaling ###
# Normalize Lengths_Mb
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb)
# Normalize ROH Frequency
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq)
# Normalize H_e
Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$H_e)

# Modify the labels by removing "Hotspot_" prefix and "_window" suffix from hotspot models
Hotspots_and_Causative_windows_comparison_sorted$Label <- ifelse(
  grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model), 
  gsub("Hotspot_|_window", "", Hotspots_and_Causative_windows_comparison_sorted$Model), 
  Hotspots_and_Causative_windows_comparison_sorted$Model
)

# Create a new column for identifying the type
Hotspots_and_Causative_windows_comparison_sorted$Type <- ifelse(
  grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model), 
  "Hotspot", 
  "Selection Coefficient"
)

plot_title <- paste("Length And Average ROH % Comparison - ROH Hotspots & Causative Windows")

# Create the 3D scatter plot
s3d <- scatterplot3d(
  Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized,
  Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized,
  Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized,
  color = ifelse(Hotspots_and_Causative_windows_comparison_sorted$Type == "Hotspot", "red", "blue"),
  pch = 19, # Solid circle
  xlab = "Normalized Lengths",
  ylab = "Normalized Mean ROH Frequency",
  zlab = "Normalized H_e",
  main = plot_title
)

# Add labels to the points
s3d.coords <- s3d$xyz.convert(
  Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized,
  Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized,
  Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized
)
text(s3d.coords$x, s3d.coords$y, 
     labels = Hotspots_and_Causative_windows_comparison_sorted$Label, 
     pos = 3, cex = 0.5) # pos=3 means above

5 Visualizing Expected Heterozygoisty

5.1 Bootstrap CI for mean 5th percentile H_e

## Models where empirical H_e is within CI for Hotspot_chr3_window_1 :
## character(0)
## 
## Models where empirical H_e is outside CI for Hotspot_chr3_window_1 :
## character(0)

## Models where empirical H_e is within CI for Hotspot_chr3_window_3 :
## character(0)
## 
## Models where empirical H_e is outside CI for Hotspot_chr3_window_3 :
## character(0)

## Models where empirical H_e is within CI for Hotspot_chr3_window_2 :
## character(0)
## 
## Models where empirical H_e is outside CI for Hotspot_chr3_window_2 :
## character(0)

## Models where empirical H_e is within CI for Hotspot_chr17_window_2 :
## character(0)
## 
## Models where empirical H_e is outside CI for Hotspot_chr17_window_2 :
## character(0)

## Models where empirical H_e is within CI for Hotspot_chr17_window_1 :
## character(0)
## 
## Models where empirical H_e is outside CI for Hotspot_chr17_window_1 :
## character(0)

## Models where empirical H_e is within CI for Hotspot_chr19_window_1 :
## character(0)
## 
## Models where empirical H_e is outside CI for Hotspot_chr19_window_1 :
## character(0)

5.2 5th percentile of the extreme H_e values